Skip to content

Commit 2ab4beb

Browse files
authored
Merge pull request ClickHouse#87777 from ClickHouse/backport/25.8/87584
Backport ClickHouse#87584 to 25.8: Avoid leaking of tracked memory for async logging and text_log
2 parents 1a86422 + 19b22d8 commit 2ab4beb

File tree

8 files changed

+25
-16
lines changed

8 files changed

+25
-16
lines changed

docs/en/operations/system-tables/trace_log.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ Columns:
3737
- `MemorySample` represents collecting random allocations and deallocations.
3838
- `MemoryPeak` represents collecting updates of peak memory usage.
3939
- `ProfileEvent` represents collecting of increments of profile events.
40+
- `JemallocSample` represents collecting of jemalloc samples.
41+
- `MemoryAllocatedWithoutCheck` represents collection of significant allocations (>16MiB) that is done with ignoring any memory limits (for ClickHouse developers only).
4042
- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread identifier.
4143
- `query_id` ([String](../../sql-reference/data-types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query_log](/operations/system-tables/query_log) system table.
4244
- `trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Stack trace at the moment of sampling. Each element is a virtual memory address inside ClickHouse server process.

src/Common/MemoryTracker.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,8 +252,9 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed
252252
if (level == VariableContext::Global)
253253
{
254254
/// For global memory tracker always update memory usage.
255-
amount.fetch_add(size, std::memory_order_relaxed);
255+
Int64 will_be = amount.fetch_add(size, std::memory_order_relaxed);
256256
rss.fetch_add(size, std::memory_order_relaxed);
257+
updatePeak(will_be, /*log_memory_usage=*/ false);
257258

258259
auto metric_loaded = metric.load(std::memory_order_relaxed);
259260
if (metric_loaded != CurrentMetrics::end())

src/Common/MemoryTrackerDebugBlockerInThread.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include <base/defines.h> /// DEBUG_OR_SANITIZER_BUILD
44

5+
/// Use it if you need to suppress MemoryAllocatedWithoutCheck for known big allocations.
56
#ifdef DEBUG_OR_SANITIZER_BUILD
67
struct MemoryTrackerDebugBlockerInThread
78
{

src/Common/SystemLogBase.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include <Interpreters/DeadLetterQueue.h>
3131
#include <IO/S3/BlobStorageLogWriter.h>
3232

33+
#include <Common/MemoryTrackerDebugBlockerInThread.h>
3334
#include <Common/MemoryTrackerBlockerInThread.h>
3435
#include <Common/SystemLogBase.h>
3536
#include <Common/ThreadPool.h>
@@ -65,7 +66,7 @@ SystemLogQueue<LogElement>::SystemLogQueue(const SystemLogQueueSettings & settin
6566
static thread_local bool recursive_push_call = false;
6667

6768
template <typename LogElement>
68-
void SystemLogQueue<LogElement>::push(LogElement&& element)
69+
void SystemLogQueue<LogElement>::push(LogElement && element)
6970
{
7071
/// It is possible that the method will be called recursively.
7172
/// Better to drop these events to avoid complications.
@@ -74,10 +75,11 @@ void SystemLogQueue<LogElement>::push(LogElement&& element)
7475
recursive_push_call = true;
7576
SCOPE_EXIT({ recursive_push_call = false; });
7677

77-
/// Memory can be allocated while resizing on queue.push_back.
78-
/// The size of allocation can be in order of a few megabytes.
79-
/// But this should not be accounted for query memory usage.
80-
/// Otherwise the tests like 01017_uniqCombined_memory_usage.sql will be flaky.
78+
79+
/// Queue resize can allocate memory
80+
/// - MemoryTrackerDebugBlockerInThread here due to the allocation can hit the limit for MemoryAllocatedWithoutCheck, let's suppress it.
81+
/// - MemoryTrackerBlockerInThread here because this allocation should not be take into account in the query scope (since it will be freed outside of it)
82+
[[maybe_unused]] MemoryTrackerDebugBlockerInThread blocker;
8183
MemoryTrackerBlockerInThread temporarily_disable_memory_tracker;
8284

8385
/// Should not log messages under mutex.
@@ -308,6 +310,8 @@ void SystemLogBase<LogElement>::stopFlushThread()
308310
template <typename LogElement>
309311
void SystemLogBase<LogElement>::add(LogElement element)
310312
{
313+
/// This allocation should not be take into account in the query scope (since it will be freed outside of it)
314+
MemoryTrackerBlockerInThread temporarily_disable_memory_tracker;
311315
queue->push(std::move(element));
312316
}
313317

src/Common/logger_useful.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <Common/AtomicLogger.h>
1010
#include <Common/CurrentThreadHelpers.h>
1111
#include <Common/Logger.h>
12+
#include <Common/MemoryTrackerBlockerInThread.h>
1213
#include <Common/LoggingFormatStringHelpers.h>
1314
#include <Common/ProfileEvents.h>
1415
#include <Common/Stopwatch.h>
@@ -73,6 +74,7 @@ constexpr bool constexprContains(std::string_view haystack, std::string_view nee
7374
{ \
7475
static_assert(!constexprContains(#__VA_ARGS__, "formatWithSecretsOneLine"), "Think twice!"); \
7576
static_assert(!constexprContains(#__VA_ARGS__, "formatWithSecretsMultiLine"), "Think twice!"); \
77+
\
7678
auto _logger = ::impl::getLoggerHelper(logger); \
7779
const bool _is_clients_log = DB::currentThreadHasGroup() && DB::currentThreadLogsLevel() >= (priority); \
7880
if (!_is_clients_log && !_logger->is((PRIORITY))) \
@@ -87,6 +89,11 @@ constexpr bool constexprContains(std::string_view haystack, std::string_view nee
8789
constexpr size_t _nargs = CH_VA_ARGS_NARGS(__VA_ARGS__); \
8890
using LogTypeInfo = FormatStringTypeInfo<std::decay_t<decltype(LOG_IMPL_FIRST_ARG(__VA_ARGS__))>>; \
8991
\
92+
/* Note, we need to block memory tracking to avoid taking into account this memory in the query context */ \
93+
/* Since this memory will be freed either in OwnAsyncSplitChannel::runChannel() (in case of async logging) */ \
94+
/* Or in a system thread for flushing system.text_log (in case of sync logging) */ \
95+
MemoryTrackerBlockerInThread block_memory_tracker(VariableContext::Global); \
96+
\
9097
std::string_view _format_string; \
9198
std::string _formatted_message; \
9299
std::vector<std::string> _format_string_args; \

src/Loggers/OwnSplitChannel.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,6 @@ void logToSystemTextLogQueue(
138138
void OwnSplitChannel::logSplit(
139139
const ExtendedLogMessage & msg_ext, const std::shared_ptr<InternalTextLogsQueue> & logs_queue, const std::string & msg_thread_name)
140140
{
141-
LockMemoryExceptionInThread lock_memory_tracker(VariableContext::Global);
142141
const Poco::Message & msg = *msg_ext.base;
143142

144143
try
@@ -393,7 +392,6 @@ void OwnAsyncSplitChannel::log(const Poco::Message & msg)
393392

394393
void OwnAsyncSplitChannel::log(Poco::Message && msg)
395394
{
396-
LockMemoryExceptionInThread lock_memory_tracker(VariableContext::Global);
397395
try
398396
{
399397
/// Based on logger_useful.h this won't be called if the message is not needed

src/Loggers/OwnSplitChannel.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ using TextLogQueue = SystemLogQueue<TextLogElement>;
3333
using AsyncLogQueueSize = std::pair<std::string, size_t>;
3434
using AsyncLogQueueSizes = std::vector<AsyncLogQueueSize>;
3535

36+
class ExtendedLogMessage;
37+
3638
class OwnSplitChannelBase : public Poco::Channel
3739
{
3840
public:

tests/integration/test_keeper_memory_soft_limit/test.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
#!/usr/bin/env python3
2-
import random
3-
import string
4-
52
import pytest
63
from kazoo.client import KazooClient
74
from helpers.cluster import ClickHouseCluster
@@ -18,10 +15,6 @@
1815
)
1916

2017

21-
def random_string(length):
22-
return "".join(random.choices(string.ascii_lowercase + string.digits, k=length))
23-
24-
2518
def get_connection_zk(nodename, timeout=30.0):
2619
# NOTE: here we need KazooClient without implicit retries! (KazooClientWithImplicitRetries)
2720
_fake_zk_instance = KazooClient(
@@ -64,7 +57,8 @@ def test_soft_limit_create(started_cluster):
6457

6558
txn.create("/test_soft_limit/node_1000001" + str(i), b"abcde")
6659
txn.commit()
67-
assert "0\n" == node.query("select sum(ProfileEvent_ZooKeeperHardwareExceptions) from system.metric_log")
60+
node.query("system flush logs metric_log")
61+
assert int(node.query("select sum(ProfileEvent_ZooKeeperHardwareExceptions) from system.metric_log").strip()) > 0
6862
return
6963

7064
raise Exception("all records are inserted but no error occurs")

0 commit comments

Comments
 (0)