@@ -11,7 +11,8 @@ namespace ErrorCodes
1111extern const int LOGICAL_ERROR;
1212}
1313
14- std::optional<HashTablesStatistics::Entry> HashTablesStatistics::getSizeHint (const Params & params)
14+ template <typename Entry>
15+ std::optional<Entry> HashTablesStatistics<Entry>::getSizeHint(const Params & params)
1516{
1617 if (!params.isCollectionAndUseEnabled ())
1718 throw DB::Exception (DB::ErrorCodes::LOGICAL_ERROR, " Collection and use of the statistics should be enabled." );
@@ -20,19 +21,15 @@ std::optional<HashTablesStatistics::Entry> HashTablesStatistics::getSizeHint(con
2021 const auto cache = getHashTableStatsCache (params, lock);
2122 if (const auto hint = cache->get (params.key ))
2223 {
23- LOG_TRACE (
24- getLogger (" HashTablesStatistics" ),
25- " An entry for key={} found in cache: sum_of_sizes={}, median_size={}" ,
26- params.key ,
27- hint->sum_of_sizes ,
28- hint->median_size );
24+ LOG_TRACE (getLogger (" HashTablesStatistics" ), " An entry for key={} found in cache: {}" , params.key , hint->dump ());
2925 return *hint;
3026 }
3127 return std::nullopt ;
3228}
3329
3430// / Collection and use of the statistics should be enabled.
35- void HashTablesStatistics::update (size_t sum_of_sizes, size_t median_size, const Params & params)
31+ template <typename Entry>
32+ void HashTablesStatistics<Entry>::update(const Entry & new_entry, const Params & params)
3633{
3734 if (!params.isCollectionAndUseEnabled ())
3835 throw DB::Exception (DB::ErrorCodes::LOGICAL_ERROR, " Collection and use of the statistics should be enabled." );
@@ -41,20 +38,15 @@ void HashTablesStatistics::update(size_t sum_of_sizes, size_t median_size, const
4138 const auto cache = getHashTableStatsCache (params, lock);
4239 const auto hint = cache->get (params.key );
4340 // We'll maintain the maximum among all the observed values until another prediction is much lower (that should indicate some change)
44- if (!hint || sum_of_sizes < hint->sum_of_sizes / 2 || hint->sum_of_sizes < sum_of_sizes || median_size < hint->median_size / 2
45- || hint->median_size < median_size)
41+ if (!hint || hint->shouldBeUpdated (new_entry))
4642 {
47- LOG_TRACE (
48- getLogger (" HashTablesStatistics" ),
49- " Statistics updated for key={}: new sum_of_sizes={}, median_size={}" ,
50- params.key ,
51- sum_of_sizes,
52- median_size);
53- cache->set (params.key , std::make_shared<Entry>(Entry{.sum_of_sizes = sum_of_sizes, .median_size = median_size}));
43+ LOG_TRACE (getLogger (" HashTablesStatistics" ), " Statistics updated for key={}: {}" , params.key , new_entry.dump ());
44+ cache->set (params.key , std::make_shared<Entry>(new_entry));
5445 }
5546}
5647
57- std::optional<HashTablesCacheStatistics> HashTablesStatistics::getCacheStats () const
48+ template <typename Entry>
49+ std::optional<HashTablesCacheStatistics> HashTablesStatistics<Entry>::getCacheStats() const
5850{
5951 std::lock_guard lock (mutex);
6052 if (hash_table_stats)
@@ -67,29 +59,38 @@ std::optional<HashTablesCacheStatistics> HashTablesStatistics::getCacheStats() c
6759 return std::nullopt ;
6860}
6961
70- HashTablesStatistics::CachePtr HashTablesStatistics::getHashTableStatsCache (const Params & params, const std::lock_guard<std::mutex> &)
62+ template <typename Entry>
63+ HashTablesStatistics<Entry>::CachePtr
64+ HashTablesStatistics<Entry>::getHashTableStatsCache(const Params & params, const std::lock_guard<std::mutex> &)
7165{
7266 if (!hash_table_stats)
7367 hash_table_stats = std::make_shared<Cache>(params.max_entries_for_hash_table_stats * sizeof (Entry));
7468 return hash_table_stats;
7569}
7670
77- HashTablesStatistics & getHashTablesStatistics ()
78- {
79- static HashTablesStatistics hash_tables_stats;
80- return hash_tables_stats;
81- }
82-
8371std::optional<HashTablesCacheStatistics> getHashTablesCacheStatistics ()
8472{
85- return getHashTablesStatistics ().getCacheStats ();
73+ HashTablesCacheStatistics res{};
74+ if (auto aggr_stats = getHashTablesStatistics<AggregationEntry>().getCacheStats ())
75+ {
76+ res.entries += aggr_stats->entries ;
77+ res.hits += aggr_stats->hits ;
78+ res.misses += aggr_stats->misses ;
79+ }
80+ if (auto hash_join_stats = getHashTablesStatistics<HashJoinEntry>().getCacheStats ())
81+ {
82+ res.entries += hash_join_stats->entries ;
83+ res.hits += hash_join_stats->hits ;
84+ res.misses += hash_join_stats->misses ;
85+ }
86+ return res;
8687}
8788
88- std::optional<HashTablesStatistics::Entry > getSizeHint (const DB::StatsCollectingParams & stats_collecting_params, size_t tables_cnt)
89+ std::optional<AggregationEntry > getSizeHint (const DB::StatsCollectingParams & stats_collecting_params, size_t tables_cnt)
8990{
9091 if (stats_collecting_params.isCollectionAndUseEnabled ())
9192 {
92- if (auto hint = DB::getHashTablesStatistics ().getSizeHint (stats_collecting_params))
93+ if (auto hint = DB::getHashTablesStatistics<AggregationEntry> ().getSizeHint (stats_collecting_params))
9394 {
9495 const auto lower_limit = hint->sum_of_sizes / tables_cnt;
9596 const auto upper_limit = stats_collecting_params.max_size_to_preallocate / tables_cnt;
@@ -109,10 +110,34 @@ std::optional<HashTablesStatistics::Entry> getSizeHint(const DB::StatsCollecting
109110 // / https://github.com/ClickHouse/ClickHouse/issues/44402#issuecomment-1359920703
110111 else if ((tables_cnt > 1 && hint->sum_of_sizes > 100'000 ) || hint->sum_of_sizes > 500'000 )
111112 {
112- return HashTablesStatistics::Entry{hint->sum_of_sizes , std::max (lower_limit, hint->median_size )};
113+ return AggregationEntry{hint->sum_of_sizes , std::max (lower_limit, hint->median_size )};
114+ }
115+ }
116+ }
117+ return std::nullopt ;
118+ }
119+
120+ std::optional<HashJoinEntry> getSizeHint (const DB::StatsCollectingParams & stats_collecting_params)
121+ {
122+ if (stats_collecting_params.isCollectionAndUseEnabled ())
123+ {
124+ if (auto hint = DB::getHashTablesStatistics<HashJoinEntry>().getSizeHint (stats_collecting_params))
125+ {
126+ if (hint->ht_size > stats_collecting_params.max_size_to_preallocate )
127+ {
128+ LOG_TRACE (
129+ getLogger (" HashTablesStatistics" ),
130+ " No space were preallocated in hash tables because 'max_size_to_preallocate' has too small value: {}, should be at "
131+ " least {}" ,
132+ stats_collecting_params.max_size_to_preallocate ,
133+ hint->ht_size );
113134 }
135+ return hint;
114136 }
115137 }
116138 return std::nullopt ;
117139}
140+
141+ template class HashTablesStatistics <AggregationEntry>;
142+ template class HashTablesStatistics <HashJoinEntry>;
118143}
0 commit comments