diff --git a/tiledb/sm/query/query.cc b/tiledb/sm/query/query.cc index 32bcd7199b6..713dd5ec691 100644 --- a/tiledb/sm/query/query.cc +++ b/tiledb/sm/query/query.cc @@ -827,6 +827,7 @@ Status Query::process() { if (!only_dim_label_query()) { if (strategy_ != nullptr) { + // The strategy destructor should reset its own Stats object here dynamic_cast(strategy_.get())->stats()->reset(); strategy_ = nullptr; } @@ -917,6 +918,7 @@ Status Query::reset_strategy_with_layout( Layout layout, bool force_legacy_reader) { force_legacy_reader_ = force_legacy_reader; if (strategy_ != nullptr) { + // The strategy destructor should reset its own Stats object here dynamic_cast(strategy_.get())->stats()->reset(); strategy_ = nullptr; } @@ -1663,6 +1665,10 @@ stats::Stats* Query::stats() const { return stats_; } +void Query::set_stats(const stats::StatsData& data) { + stats_->populate_with_data(data); +} + shared_ptr Query::rest_scratch() const { return rest_scratch_; } diff --git a/tiledb/sm/query/query.h b/tiledb/sm/query/query.h index c5c8ecba2db..e54c0342f90 100644 --- a/tiledb/sm/query/query.h +++ b/tiledb/sm/query/query.h @@ -655,6 +655,14 @@ class Query { /** Returns the internal stats object. */ stats::Stats* stats() const; + /** + * Populate the owned stats instance with data. + * To be removed when the class will get a C41 constructor. + * + * @param data Data to populate the stats with. + */ + void set_stats(const stats::StatsData& data); + /** Returns the scratch space used for REST requests. */ shared_ptr rest_scratch() const; diff --git a/tiledb/sm/query/strategy_base.cc b/tiledb/sm/query/strategy_base.cc index 393a1bff0c6..09406c734de 100644 --- a/tiledb/sm/query/strategy_base.cc +++ b/tiledb/sm/query/strategy_base.cc @@ -65,6 +65,10 @@ stats::Stats* StrategyBase::stats() const { return stats_; } +void StrategyBase::set_stats(const stats::StatsData& data) { + stats_->populate_with_data(data); +} + /* ****************************** */ /* PROTECTED METHODS */ /* ****************************** */ diff --git a/tiledb/sm/query/strategy_base.h b/tiledb/sm/query/strategy_base.h index 91bc7194640..b5d1abb983a 100644 --- a/tiledb/sm/query/strategy_base.h +++ b/tiledb/sm/query/strategy_base.h @@ -208,6 +208,14 @@ class StrategyBase { /** Returns `stats_`. */ stats::Stats* stats() const; + /** + * Populate the owned stats instance with data. + * To be removed when the class will get a C41 constructor. + * + * @param data Data to populate the stats with. + */ + void set_stats(const stats::StatsData& data); + /** Returns the configured offsets format mode. */ std::string offsets_mode() const; diff --git a/tiledb/sm/serialization/query.cc b/tiledb/sm/serialization/query.cc index 75162c69347..92414cd51d5 100644 --- a/tiledb/sm/serialization/query.cc +++ b/tiledb/sm/serialization/query.cc @@ -88,7 +88,7 @@ namespace serialization { shared_ptr dummy_logger = make_shared(HERE(), ""); -Status stats_to_capnp(Stats& stats, capnp::Stats::Builder* stats_builder) { +void stats_to_capnp(const Stats& stats, capnp::Stats::Builder* stats_builder) { // Build counters const auto counters = stats.counters(); if (counters != nullptr && !counters->empty()) { @@ -114,31 +114,29 @@ Status stats_to_capnp(Stats& stats, capnp::Stats::Builder* stats_builder) { ++index; } } - - return Status::Ok(); } -Status stats_from_capnp( - const capnp::Stats::Reader& stats_reader, Stats* stats) { +StatsData stats_from_capnp(const capnp::Stats::Reader& stats_reader) { + std::unordered_map counters; + std::unordered_map timers; + if (stats_reader.hasCounters()) { - auto counters = stats->counters(); auto counters_reader = stats_reader.getCounters(); for (const auto entry : counters_reader.getEntries()) { auto key = std::string_view{entry.getKey().cStr(), entry.getKey().size()}; - (*counters)[std::string{key}] = entry.getValue(); + counters[std::string(key)] = entry.getValue(); } } if (stats_reader.hasTimers()) { - auto timers = stats->timers(); auto timers_reader = stats_reader.getTimers(); for (const auto entry : timers_reader.getEntries()) { auto key = std::string_view{entry.getKey().cStr(), entry.getKey().size()}; - (*timers)[std::string{key}] = entry.getValue(); + timers[std::string(key)] = entry.getValue(); } } - return Status::Ok(); + return stats::StatsData(counters, timers); } void range_buffers_to_capnp( @@ -246,11 +244,9 @@ Status subarray_to_capnp( } // If stats object exists set its cap'n proto object - stats::Stats* stats = subarray->stats(); - if (stats != nullptr) { - auto stats_builder = builder->initStats(); - RETURN_NOT_OK(stats_to_capnp(*stats, &stats_builder)); - } + const auto& stats = subarray->stats(); + auto stats_builder = builder->initStats(); + stats_to_capnp(stats, &stats_builder); if (subarray->relevant_fragments().relevant_fragments_size() > 0) { auto relevant_fragments_builder = builder->initRelevantFragments( @@ -328,11 +324,8 @@ Status subarray_from_capnp( // If cap'n proto object has stats set it on c++ object if (reader.hasStats()) { - stats::Stats* stats = subarray->stats(); - // We should always have a stats here - if (stats != nullptr) { - RETURN_NOT_OK(stats_from_capnp(reader.getStats(), stats)); - } + auto stats_data = stats_from_capnp(reader.getStats()); + subarray->set_stats(stats_data); } if (reader.hasRelevantFragments()) { @@ -428,11 +421,9 @@ Status subarray_partitioner_to_capnp( builder->setMemoryBudgetValidity(mem_budget_validity); // If stats object exists set its cap'n proto object - stats::Stats* stats = partitioner.stats(); - if (stats != nullptr) { - auto stats_builder = builder->initStats(); - RETURN_NOT_OK(stats_to_capnp(*stats, &stats_builder)); - } + const auto& stats = partitioner.stats(); + auto stats_builder = builder->initStats(); + stats_to_capnp(stats, &stats_builder); return Status::Ok(); } @@ -566,11 +557,8 @@ Status subarray_partitioner_from_capnp( // If cap'n proto object has stats set it on c++ object if (reader.hasStats()) { - auto stats = partitioner->stats(); - // We should always have stats - if (stats != nullptr) { - RETURN_NOT_OK(stats_from_capnp(reader.getStats(), stats)); - } + auto stats_data = stats_from_capnp(reader.getStats()); + partitioner->set_stats(stats_data); } return Status::Ok(); @@ -913,11 +901,9 @@ Status reader_to_capnp( } // If stats object exists set its cap'n proto object - stats::Stats* stats = reader.stats(); - if (stats != nullptr) { - auto stats_builder = reader_builder->initStats(); - RETURN_NOT_OK(stats_to_capnp(*stats, &stats_builder)); - } + const auto& stats = *reader.stats(); + auto stats_builder = reader_builder->initStats(); + stats_to_capnp(stats, &stats_builder); return Status::Ok(); } @@ -947,11 +933,9 @@ Status index_reader_to_capnp( } // If stats object exists set its cap'n proto object - stats::Stats* stats = reader.stats(); - if (stats != nullptr) { - auto stats_builder = reader_builder->initStats(); - RETURN_NOT_OK(stats_to_capnp(*stats, &stats_builder)); - } + const auto& stats = *reader.stats(); + auto stats_builder = reader_builder->initStats(); + stats_to_capnp(stats, &stats_builder); return Status::Ok(); } @@ -982,11 +966,9 @@ Status dense_reader_to_capnp( } // If stats object exists set its cap'n proto object - stats::Stats* stats = reader.stats(); - if (stats != nullptr) { - auto stats_builder = reader_builder->initStats(); - RETURN_NOT_OK(stats_to_capnp(*stats, &stats_builder)); - } + const auto& stats = *reader.stats(); + auto stats_builder = reader_builder->initStats(); + stats_to_capnp(stats, &stats_builder); return Status::Ok(); } @@ -1146,11 +1128,8 @@ Status reader_from_capnp( // If cap'n proto object has stats set it on c++ object if (reader_reader.hasStats()) { - stats::Stats* stats = reader->stats(); - // We should always have a stats here - if (stats != nullptr) { - RETURN_NOT_OK(stats_from_capnp(reader_reader.getStats(), stats)); - } + auto stats_data = stats_from_capnp(reader_reader.getStats()); + reader->set_stats(stats_data); } return Status::Ok(); @@ -1187,11 +1166,8 @@ Status index_reader_from_capnp( // If cap'n proto object has stats set it on c++ object if (reader_reader.hasStats()) { - stats::Stats* stats = reader->stats(); - // We should always have a stats here - if (stats != nullptr) { - RETURN_NOT_OK(stats_from_capnp(reader_reader.getStats(), stats)); - } + auto stats_data = stats_from_capnp(reader_reader.getStats()); + reader->set_stats(stats_data); } return Status::Ok(); @@ -1229,11 +1205,8 @@ Status dense_reader_from_capnp( // If cap'n proto object has stats set it on c++ object if (reader_reader.hasStats()) { - stats::Stats* stats = reader->stats(); - // We should always have a stats here - if (stats != nullptr) { - RETURN_NOT_OK(stats_from_capnp(reader_reader.getStats(), stats)); - } + auto stats_data = stats_from_capnp(reader_reader.getStats()); + reader->set_stats(stats_data); } return Status::Ok(); @@ -1252,11 +1225,8 @@ Status delete_from_capnp( // If cap'n proto object has stats set it on c++ object if (delete_reader.hasStats()) { - stats::Stats* stats = delete_strategy->stats(); - // We should always have a stats here - if (stats != nullptr) { - RETURN_NOT_OK(stats_from_capnp(delete_reader.getStats(), stats)); - } + auto stats_data = stats_from_capnp(delete_reader.getStats()); + delete_strategy->set_stats(stats_data); } return Status::Ok(); @@ -1273,11 +1243,9 @@ Status delete_to_capnp( } // If stats object exists set its cap'n proto object - stats::Stats* stats = delete_strategy.stats(); - if (stats != nullptr) { - auto stats_builder = delete_builder->initStats(); - RETURN_NOT_OK(stats_to_capnp(*stats, &stats_builder)); - } + const auto& stats = *delete_strategy.stats(); + auto stats_builder = delete_builder->initStats(); + stats_to_capnp(stats, &stats_builder); return Status::Ok(); } @@ -1301,11 +1269,9 @@ Status writer_to_capnp( } // If stats object exists set its cap'n proto object - stats::Stats* stats = writer.stats(); - if (stats != nullptr) { - auto stats_builder = writer_builder->initStats(); - RETURN_NOT_OK(stats_to_capnp(*stats, &stats_builder)); - } + const auto& stats = *writer.stats(); + auto stats_builder = writer_builder->initStats(); + stats_to_capnp(stats, &stats_builder); if (query.layout() == Layout::GLOBAL_ORDER) { auto& global_writer = dynamic_cast(writer); @@ -1341,11 +1307,8 @@ Status writer_from_capnp( // If cap'n proto object has stats set it on c++ object if (writer_reader.hasStats()) { - stats::Stats* stats = writer->stats(); - // We should always have a stats here - if (stats != nullptr) { - RETURN_NOT_OK(stats_from_capnp(writer_reader.getStats(), stats)); - } + auto stats_data = stats_from_capnp(writer_reader.getStats()); + writer->set_stats(stats_data); } if (query.layout() == Layout::GLOBAL_ORDER && @@ -1557,10 +1520,10 @@ Status query_to_capnp( RETURN_NOT_OK(config_to_capnp(query.config(), &config_builder)); // If stats object exists set its cap'n proto object - stats::Stats* stats = query.stats(); - if (stats != nullptr) { + auto stats = query.stats(); + if (stats) { auto stats_builder = query_builder->initStats(); - RETURN_NOT_OK(stats_to_capnp(*stats, &stats_builder)); + stats_to_capnp(*stats, &stats_builder); } auto& written_fragment_info = query.get_written_fragment_info(); @@ -2270,11 +2233,8 @@ Status query_from_capnp( // If cap'n proto object has stats set it on c++ object if (query_reader.hasStats()) { - stats::Stats* stats = query->stats(); - // We should always have a stats here - if (stats != nullptr) { - RETURN_NOT_OK(stats_from_capnp(query_reader.getStats(), stats)); - } + auto stats_data = stats_from_capnp(query_reader.getStats()); + query->set_stats(stats_data); } if (query_reader.hasWrittenFragmentInfo()) { @@ -3180,11 +3140,9 @@ void ordered_dim_label_reader_to_capnp( query.dimension_label_increasing_order()); // If stats object exists set its cap'n proto object - stats::Stats* stats = reader.stats(); - if (stats != nullptr) { - auto stats_builder = reader_builder->initStats(); - throw_if_not_ok(stats_to_capnp(*stats, &stats_builder)); - } + const auto& stats = *reader.stats(); + auto stats_builder = reader_builder->initStats(); + stats_to_capnp(stats, &stats_builder); } void ordered_dim_label_reader_from_capnp( @@ -3212,11 +3170,8 @@ void ordered_dim_label_reader_from_capnp( // If cap'n proto object has stats set it on c++ object if (reader_reader.hasStats()) { - stats::Stats* stats = reader->stats(); - // We should always have a stats here - if (stats != nullptr) { - throw_if_not_ok(stats_from_capnp(reader_reader.getStats(), stats)); - } + auto stats_data = stats_from_capnp(reader_reader.getStats()); + reader->set_stats(stats_data); } } diff --git a/tiledb/sm/stats/global_stats.h b/tiledb/sm/stats/global_stats.h index 7d5635ef483..66d9e3dfab5 100644 --- a/tiledb/sm/stats/global_stats.h +++ b/tiledb/sm/stats/global_stats.h @@ -49,6 +49,71 @@ #include "tiledb/common/heap_memory.h" #include "tiledb/sm/stats/stats.h" +/** + * Documenting the current stats behavior and architecture as close as + * possible to the code so it's helpful next time someone tries to refactor. + * + * Statistics collection is done at the top level via the GlobalStats class + * defined in this file. + * We maintain a global object called `all_stats` which is used to register + * Stats objects, enable/disable collection, reset or dumping the collected + * stats. + * + * The TileDB C API uses the `all_stats` object directly to execute the + * actions iterated above. + * + * The GlobalStats class owns a list called `registered_stats` that has one + * Stats object registered for each Context used. In consequence, + * ContextResources register a Stats object for each Context created, this + * object serves as the root for the tree of all children Stats used in a + * Context. + * + * As mentioned above, the Stats objects used under a Context form a tree. + * Each Stats object mentains a list of children Stats and a pointer to the + * parent Stats object. + * The Stats object created by ContextResources(named "Context.StorageManager") + * is the only Stats constructed in a standalone fashion using the Stats + * constructor, all the other objects under this root Stats are created via + * the Stats::create_child API. + * + * The (current, please update if not accurate anymore) exhaustive list of + * Stats we maintain under a Context is: + * --------------------------- + * ContextResources + * - Query + * - Reader + * - Writer + * - DenseTiler + * - Subarray + * - Deletes + * - Subarray + * - subSubarray + * - SubarrayPartitioner + * - VFS + * - S3 + * - ArrayDirectory + * - RestClient + * - Consolidator + * --------------------------- + * Please visualize this as a tree, it was much easier to write + * like this, the tree is too wide. + * + * + * Observed issues: + * - Stats objects currently are created via Stats::create_child API from a + * parent stats object. Child objects such as e.g. Subarray only hold a + * pointer to the Stats object, this means that the Stats objects outlive + * the objects they represent and are kept alive by the tree like structure + * defined by the Stats class. + * In theory, a Context running for a long time would OOM the machine with + * Stats objects. + * + * - Stats::populate_flattened_stats aggregate the collected statistic via + * summing. But we also collect ".min", ".max" statistics as well, + * sum-aggregating those is incorrect. Currently the dump function just + * doesn't print those statistics. + */ + namespace tiledb { namespace sm { namespace stats { @@ -158,8 +223,7 @@ class GlobalStats { /* ********************************* */ /** - * The singleton instance holding all global stats counters. The report will - * be automatically made when this object is destroyed (at program termination). + * The singleton instance holding all global stats counters and timers. */ extern GlobalStats all_stats; diff --git a/tiledb/sm/stats/stats.cc b/tiledb/sm/stats/stats.cc index 5773c312b58..82de3ed2549 100644 --- a/tiledb/sm/stats/stats.cc +++ b/tiledb/sm/stats/stats.cc @@ -48,9 +48,14 @@ namespace stats { /* ****************************** */ Stats::Stats(const std::string& prefix) + : Stats(prefix, StatsData{}) { +} + +Stats::Stats(const std::string& prefix, const StatsData& data) : enabled_(true) , prefix_(prefix + ".") , parent_(nullptr) { + this->populate_with_data(data); } /* ****************************** */ @@ -246,8 +251,12 @@ Stats* Stats::parent() { } Stats* Stats::create_child(const std::string& prefix) { + return create_child(prefix, StatsData{}); +} + +Stats* Stats::create_child(const std::string& prefix, const StatsData& data) { std::unique_lock lck(mtx_); - children_.emplace_back(prefix_ + prefix); + children_.emplace_back(prefix_ + prefix, data); Stats* const child = &children_.back(); child->parent_ = this; return child; @@ -272,15 +281,26 @@ void Stats::populate_flattened_stats( } } -std::unordered_map* Stats::timers() { +const std::unordered_map* Stats::timers() const { return &timers_; } /** Return pointer to conters map, used for serialization only. */ -std::unordered_map* Stats::counters() { +const std::unordered_map* Stats::counters() const { return &counters_; } +void Stats::populate_with_data(const StatsData& data) { + auto& timers = data.timers(); + for (const auto& timer : timers) { + timers_[timer.first] = timer.second; + } + auto& counters = data.counters(); + for (const auto& counter : counters) { + counters_[counter.first] = counter.second; + } +} + } // namespace stats } // namespace sm } // namespace tiledb diff --git a/tiledb/sm/stats/stats.h b/tiledb/sm/stats/stats.h index 018ed721f63..47691c8d294 100644 --- a/tiledb/sm/stats/stats.h +++ b/tiledb/sm/stats/stats.h @@ -51,6 +51,58 @@ namespace tiledb { namespace sm { namespace stats { +/** + * Class that holds measurement data that Stats objects can be + * initialized with. + */ +class StatsData { + public: + /* ****************************** */ + /* CONSTRUCTORS & DESTRUCTORS */ + /* ****************************** */ + + /* Default constructor */ + StatsData() = default; + + /** + * Value constructor. + * + * @param counters A map of counters + * @param timers A map of timers + */ + StatsData( + std::unordered_map& counters, + std::unordered_map& timers) + : counters_(counters) + , timers_(timers) { + } + + /* ****************************** */ + /* API */ + /* ****************************** */ + + /** Get a reference to internal counters */ + const std::unordered_map& counters() const { + return counters_; + } + + /** Get a reference to internal timers */ + const std::unordered_map& timers() const { + return timers_; + } + + private: + /* ****************************** */ + /* PRIVATE ATTRIBUTES */ + /* ****************************** */ + + /** Map of counters and values */ + std::unordered_map counters_; + + /** Map of timers and values */ + std::unordered_map timers_; +}; + /** * Class that defines stats counters and methods to manipulate them. */ @@ -72,6 +124,14 @@ class Stats { */ Stats(const std::string& prefix); + /** + * Value constructor. + * + * @param prefix The stat name prefix. + * @param data Initial data to populate the Stats object with. + */ + Stats(const std::string& prefix, const StatsData& data); + /** Destructor. */ ~Stats() = default; @@ -116,11 +176,29 @@ class Stats { /** Creates a child instance, managed by this instance. */ Stats* create_child(const std::string& prefix); + /** + * Creates a child instance, managed by this instance, the instance is + * constructed with initial data. + * + * @param prefix The stat name prefix. + * @param data Initial data to populate the Stats object with. + */ + Stats* create_child(const std::string& prefix, const StatsData& data); + /** Return pointer to timers map, used for serialization only. */ - std::unordered_map* timers(); + const std::unordered_map* timers() const; /** Return pointer to conters map, used for serialization only. */ - std::unordered_map* counters(); + const std::unordered_map* counters() const; + + /** + * Populate the counters and timers internal maps from a StatsData object + * Please be aware that the data is not being added up, it will override the + * existing data on the Stats object. + * + * @param data Data to populate the stats with. + */ + void populate_with_data(const StatsData& data); private: /* ****************************** */ diff --git a/tiledb/sm/subarray/subarray.cc b/tiledb/sm/subarray/subarray.cc index 486c88b9c94..d58584717d3 100644 --- a/tiledb/sm/subarray/subarray.cc +++ b/tiledb/sm/subarray/subarray.cc @@ -3080,8 +3080,12 @@ RelevantFragments& Subarray::relevant_fragments() { return relevant_fragments_; } -stats::Stats* Subarray::stats() const { - return stats_; +const stats::Stats& Subarray::stats() const { + return *stats_; +} + +void Subarray::set_stats(const stats::StatsData& data) { + stats_->populate_with_data(data); } tuple> Subarray::non_overlapping_ranges_for_dim( diff --git a/tiledb/sm/subarray/subarray.h b/tiledb/sm/subarray/subarray.h index a16e92b376e..84c986f61e3 100644 --- a/tiledb/sm/subarray/subarray.h +++ b/tiledb/sm/subarray/subarray.h @@ -1276,7 +1276,15 @@ class Subarray { std::vector* end_coords) const; /** Returns `stats_`. */ - stats::Stats* stats() const; + const stats::Stats& stats() const; + + /** + * Populate the owned stats instance with data. + * To be removed when the class will get a C41 constructor. + * + * @param data Data to populate the stats with. + */ + void set_stats(const stats::StatsData& data); /** Stores a vector of 1D ranges per dimension. */ std::vector> original_range_idx_; diff --git a/tiledb/sm/subarray/subarray_partitioner.cc b/tiledb/sm/subarray/subarray_partitioner.cc index db6dab81cf4..e22fbf28ac5 100644 --- a/tiledb/sm/subarray/subarray_partitioner.cc +++ b/tiledb/sm/subarray/subarray_partitioner.cc @@ -666,8 +666,12 @@ Subarray& SubarrayPartitioner::subarray() { return subarray_; } -stats::Stats* SubarrayPartitioner::stats() const { - return stats_; +const stats::Stats& SubarrayPartitioner::stats() const { + return *stats_; +} + +void SubarrayPartitioner::set_stats(const stats::StatsData& data) { + stats_->populate_with_data(data); } /* ****************************** */ diff --git a/tiledb/sm/subarray/subarray_partitioner.h b/tiledb/sm/subarray/subarray_partitioner.h index 919b9a1b626..2915fd7aefe 100644 --- a/tiledb/sm/subarray/subarray_partitioner.h +++ b/tiledb/sm/subarray/subarray_partitioner.h @@ -333,7 +333,14 @@ class SubarrayPartitioner { Subarray& subarray(); /** Returns `stats_`. */ - stats::Stats* stats() const; + const stats::Stats& stats() const; + + /** Populate the owned stats instance with data. + * To be removed when the class will get a C41 constructor. + * + * @param data Data to populate the stats with. + */ + void set_stats(const stats::StatsData& data); private: /* ********************************* */