@@ -118,39 +118,48 @@ class TTxState : public TAtomicRefCount<TTxState> {
118118 const TString PoolId;
119119 const double MemoryPoolPercent;
120120 const TString Database;
121+ const bool CollectBacktrace;
121122
122123private:
123124 std::atomic<ui64> TxScanQueryMemory = 0 ;
124125 std::atomic<ui64> TxExternalDataQueryMemory = 0 ;
125126 std::atomic<ui32> TxExecutionUnits = 0 ;
126- std::atomic<ui64> TxMaxAllocation = 0 ;
127- std::atomic<ui64> TxFailedAllocation = 0 ;
127+ std::atomic<ui64> TxMaxAllocationSize = 0 ;
128128
129129 // TODO(ilezhankin): it's better to use std::atomic<std::shared_ptr<>> which is not supported at the moment.
130130 std::atomic<TBackTrace*> TxMaxAllocationBacktrace = nullptr ;
131- std::atomic<TBackTrace*> TxFailedAllocationBacktrace = nullptr ;
131+
132+ // NOTE: it's hard to maintain atomic pointer in case of tracking the last failed allocation backtrace,
133+ // because while we try to print one - the new last may emerge and delete previous.
134+ mutable std::mutex BacktraceMutex;
135+ std::atomic<ui64> TxFailedAllocationSize = 0 ; // protected by BacktraceMutex (only if CollectBacktrace == true)
136+ TBackTrace TxFailedAllocationBacktrace; // protected by BacktraceMutex
137+ std::atomic<bool > HasFailedAllocationBacktrace = false ;
132138
133139public:
134140 explicit TTxState (ui64 txId, TInstant now, TIntrusivePtr<TKqpCounters> counters, const TString& poolId, const double memoryPoolPercent,
135- const TString& database)
141+ const TString& database, bool collectBacktrace )
136142 : TxId(txId)
137143 , CreatedAt(now)
138144 , Counters(std::move(counters))
139145 , PoolId(poolId)
140146 , MemoryPoolPercent(memoryPoolPercent)
141147 , Database(database)
148+ , CollectBacktrace(collectBacktrace)
142149 {}
143150
144151 ~TTxState () {
145152 delete TxMaxAllocationBacktrace.load ();
146- delete TxFailedAllocationBacktrace.load ();
147153 }
148154
149155 std::pair<TString, TString> MakePoolId () const {
150156 return std::make_pair (Database, PoolId);
151157 }
152158
153- TString ToString (bool verbose = false ) const {
159+ TString ToString () const {
160+ // use unique_lock to safely unlock mutex in case of exceptions
161+ std::unique_lock backtraceLock (BacktraceMutex, std::defer_lock);
162+
154163 auto res = TStringBuilder () << " TxResourcesInfo { "
155164 << " TxId: " << TxId
156165 << " , Database: " << Database;
@@ -160,19 +169,28 @@ class TTxState : public TAtomicRefCount<TTxState> {
160169 << " , MemoryPoolPercent: " << Sprintf (" %.2f" , MemoryPoolPercent > 0 ? MemoryPoolPercent : 100 );
161170 }
162171
172+ if (CollectBacktrace) {
173+ backtraceLock.lock ();
174+ }
175+
163176 res << " , tx initially granted memory: " << HumanReadableSize (TxExternalDataQueryMemory.load (), SF_BYTES)
164177 << " , tx total memory allocations: " << HumanReadableSize (TxScanQueryMemory.load (), SF_BYTES)
165- << " , tx largest successful memory allocation: " << HumanReadableSize (TxMaxAllocation .load (), SF_BYTES)
166- << " , tx last failed memory allocation: " << HumanReadableSize (TxFailedAllocation .load (), SF_BYTES)
178+ << " , tx largest successful memory allocation: " << HumanReadableSize (TxMaxAllocationSize .load (), SF_BYTES)
179+ << " , tx last failed memory allocation: " << HumanReadableSize (TxFailedAllocationSize .load (), SF_BYTES)
167180 << " , tx total execution units: " << TxExecutionUnits.load ()
168181 << " , started at: " << CreatedAt
169182 << " }" << Endl;
170183
171- if (verbose && TxMaxAllocationBacktrace.load ()) {
172- res << " TxMaxAllocationBacktrace:" << Endl << TxMaxAllocationBacktrace.load ()->PrintToString ();
184+ if (CollectBacktrace && HasFailedAllocationBacktrace.load ()) {
185+ res << " TxFailedAllocationBacktrace:" << Endl << TxFailedAllocationBacktrace.PrintToString ();
186+ }
187+
188+ if (CollectBacktrace) {
189+ backtraceLock.unlock ();
173190 }
174- if (verbose && TxFailedAllocationBacktrace.load ()) {
175- res << " TxFailedAllocationBacktrace:" << Endl << TxFailedAllocationBacktrace.load ()->PrintToString ();
191+
192+ if (CollectBacktrace && TxMaxAllocationBacktrace.load ()) {
193+ res << " TxMaxAllocationBacktrace:" << Endl << TxMaxAllocationBacktrace.load ()->PrintToString ();
176194 }
177195
178196 return res;
@@ -183,22 +201,19 @@ class TTxState : public TAtomicRefCount<TTxState> {
183201 }
184202
185203 void AckFailedMemoryAlloc (ui64 memory) {
186- auto * oldBacktrace = TxFailedAllocationBacktrace.load ();
187- ui64 lastAlloc = TxFailedAllocation.load ();
188- bool exchanged = false ;
204+ // use unique_lock to safely unlock mutex in case of exceptions
205+ std::unique_lock backtraceLock (BacktraceMutex, std::defer_lock);
189206
190- while (!exchanged ) {
191- exchanged = TxFailedAllocation. compare_exchange_weak (lastAlloc, memory );
207+ if (CollectBacktrace ) {
208+ backtraceLock. lock ( );
192209 }
193210
194- if (exchanged) {
195- auto * newBacktrace = new TBackTrace ();
196- newBacktrace->Capture ();
197- if (TxFailedAllocationBacktrace.compare_exchange_strong (oldBacktrace, newBacktrace)) {
198- delete oldBacktrace;
199- } else {
200- delete newBacktrace;
201- }
211+ TxFailedAllocationSize = memory;
212+
213+ if (CollectBacktrace) {
214+ TxFailedAllocationBacktrace.Capture ();
215+ HasFailedAllocationBacktrace = true ;
216+ backtraceLock.unlock ();
202217 }
203218 }
204219
@@ -239,17 +254,18 @@ class TTxState : public TAtomicRefCount<TTxState> {
239254 }
240255
241256 auto * oldBacktrace = TxMaxAllocationBacktrace.load ();
242- ui64 maxAlloc = TxMaxAllocation .load ();
257+ ui64 maxAlloc = TxMaxAllocationSize .load ();
243258 bool exchanged = false ;
244259
245260 while (maxAlloc < resources.Memory && !exchanged) {
246- exchanged = TxMaxAllocation .compare_exchange_weak (maxAlloc, resources.Memory );
261+ exchanged = TxMaxAllocationSize .compare_exchange_weak (maxAlloc, resources.Memory );
247262 }
248263
249264 if (exchanged) {
250265 auto * newBacktrace = new TBackTrace ();
251266 newBacktrace->Capture ();
252267 if (TxMaxAllocationBacktrace.compare_exchange_strong (oldBacktrace, newBacktrace)) {
268+ // XXX(ilezhankin): technically it's possible to have a race with `ToString()`, but it's very unlikely.
253269 delete oldBacktrace;
254270 } else {
255271 delete newBacktrace;
0 commit comments