Skip to content

Commit 27c8534

Browse files
markplesjkoritzinskymrsharm
authored
Fix GC stress log and analysis (#108655)
* Allow floats to be recorded into the stresslog on 64-bit platforms (as 64-bit floating point values) * change gc start message and some factoring * quick fix to allowing gcstart to be a prefix * StressLogAnalyzer - handle 16 format arguments and avoid buffer overruns * comment * get_str_gc_type and remove non-BGC special cases * fix * TRACE_GC off * enable by default * not always * LL_INFO1000 for 'Segment mem' message * fewer messages * fewer messages * remove sizeof * Handle %3s * comment * comment * See if stresslog.h STRESS_LOG_VA is unnecessary * _DEBUG * Revert "See if stresslog.h STRESS_LOG_VA is unnecessary" This reverts commit 50eb8bd. * redo ifdef * move a few more dprintfs to 2 * comments * Fixed missed comments --------- Co-authored-by: Jeremy Koritzinsky <[email protected]> Co-authored-by: Mukund Raghav Sharma <[email protected]>
1 parent 1e860a4 commit 27c8534

File tree

8 files changed

+181
-55
lines changed

8 files changed

+181
-55
lines changed

src/coreclr/gc/env/gcenv.h

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,6 @@ enum LogFacility
9797
LF_GC = 0x00000001,
9898
LF_GCALLOC = 0x00000100,
9999
LF_GCROOTS = 0x00080000,
100-
LF_ALWAYS = 0x80000000,
101100
};
102101

103102
enum LogLevel
@@ -192,19 +191,25 @@ struct StressLogMsg
192191
}
193192
};
194193

195-
template<>
196-
void* StressLogMsg::ConvertArgument(float arg) = delete;
197-
198194
#if TARGET_64BIT
199195
template<>
200196
inline void* StressLogMsg::ConvertArgument(double arg)
201197
{
202198
return (void*)(size_t)(*((uint64_t*)&arg));
203199
}
200+
// COMPAT: Convert 32-bit floats to 64-bit doubles.
201+
template<>
202+
inline void* StressLogMsg::ConvertArgument(float arg)
203+
{
204+
return StressLogMsg::ConvertArgument((double)arg);
205+
}
204206
#else
205207
template<>
206208
void* StressLogMsg::ConvertArgument(double arg) = delete;
207209

210+
template<>
211+
void* StressLogMsg::ConvertArgument(float arg) = delete;
212+
208213
// COMPAT: Truncate 64-bit integer arguments to 32-bit
209214
template<>
210215
inline void* StressLogMsg::ConvertArgument(uint64_t arg)
@@ -234,7 +239,7 @@ class StressLog
234239

235240
static void LogMsg(unsigned dprintfLevel, const StressLogMsg& msg)
236241
{
237-
GCToEEInterface::LogStressMsg(LL_ALWAYS, LF_ALWAYS|(dprintfLevel<<16)|LF_GC, msg);
242+
GCToEEInterface::LogStressMsg(LL_ALWAYS, (dprintfLevel<<16)|LF_GC, msg);
238243
}
239244

240245
static void LogMsg(unsigned level, unsigned facility, const StressLogMsg& msg)

src/coreclr/gc/gc.cpp

Lines changed: 41 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -7463,7 +7463,7 @@ bool gc_heap::virtual_commit (void* address, size_t size, int bucket, int h_numb
74637463

74647464
if ((base + size) > limit)
74657465
{
7466-
dprintf (1, ("%zd + %zd = %zd > limit %zd ", base, size, (base + size), limit));
7466+
dprintf (2, ("%zd + %zd = %zd > limit %zd ", base, size, (base + size), limit));
74677467
exceeded_p = true;
74687468
}
74697469
}
@@ -8280,9 +8280,9 @@ uint8_t* get_plug_start_in_saved (uint8_t* old_loc, mark* pinned_plug_entry)
82808280
{
82818281
uint8_t* saved_pre_plug_info = (uint8_t*)(pinned_plug_entry->get_pre_plug_reloc_info());
82828282
uint8_t* plug_start_in_saved = saved_pre_plug_info + (old_loc - (pinned_plug (pinned_plug_entry) - sizeof (plug_and_gap)));
8283-
//dprintf (1, ("detected a very short plug: %zx before PP %zx, pad %zx",
8283+
//dprintf (2, ("detected a very short plug: %zx before PP %zx, pad %zx",
82848284
// old_loc, pinned_plug (pinned_plug_entry), plug_start_in_saved));
8285-
dprintf (1, ("EP: %p(%p), %p", old_loc, pinned_plug (pinned_plug_entry), plug_start_in_saved));
8285+
dprintf (2, ("EP: %p(%p), %p", old_loc, pinned_plug (pinned_plug_entry), plug_start_in_saved));
82868286
return plug_start_in_saved;
82878287
}
82888288

@@ -13531,7 +13531,7 @@ void gc_heap::distribute_free_regions()
1353113531

1353213532
size_t total_basic_free_regions = total_num_free_regions[basic_free_region] + surplus_regions[basic_free_region].get_num_free_regions();
1353313533
total_budget_in_region_units[basic_free_region] = compute_basic_region_budgets(heap_budget_in_region_units[basic_free_region], min_heap_budget_in_region_units[basic_free_region], total_basic_free_regions);
13534-
13534+
1353513535
bool aggressive_decommit_large_p = joined_last_gc_before_oom || dt_high_memory_load_p() || near_heap_hard_limit_p();
1353613536

1353713537
int region_factor[count_distributed_free_region_kinds] = { 1, LARGE_REGION_FACTOR };
@@ -31148,7 +31148,7 @@ void gc_heap::realloc_plan_generation_start (generation* gen, generation* consin
3114831148
generation_allocation_pointer (consing_gen) += allocation_left;
3114931149
}
3115031150

31151-
dprintf (1, ("plan re-alloc gen%d start at %p (ptr: %p, limit: %p)", gen->gen_num,
31151+
dprintf (2, ("plan re-alloc gen%d start at %p (ptr: %p, limit: %p)", gen->gen_num,
3115231152
generation_plan_allocation_start (consing_gen),
3115331153
generation_allocation_pointer (consing_gen),
3115431154
generation_allocation_limit (consing_gen)));
@@ -32751,7 +32751,7 @@ void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* c
3275132751
}
3275232752
}
3275332753

32754-
dprintf (1, ("we still need %d regions, %d will be empty", plan_regions_needed, to_be_empty_regions));
32754+
dprintf (REGIONS_LOG, ("we still need %d regions, %d will be empty", plan_regions_needed, to_be_empty_regions));
3275532755
if (plan_regions_needed > to_be_empty_regions)
3275632756
{
3275732757
dprintf (REGIONS_LOG, ("h%d %d regions will be empty but we still need %d regions!!", heap_number, to_be_empty_regions, plan_regions_needed));
@@ -34104,7 +34104,7 @@ void gc_heap::plan_phase (int condemned_gen_number)
3410434104
#else
3410534105
0;
3410634106
#endif //SHORT_PLUGS
34107-
dprintf (1, ("gen%d: NON PIN alloc: %zd, pin com: %zd, sweep: %zd, surv: %zd, pinsurv: %zd(%d%% added, %d%% art), np surv: %zd, pad: %zd",
34107+
dprintf (2, ("gen%d: NON PIN alloc: %zd, pin com: %zd, sweep: %zd, surv: %zd, pinsurv: %zd(%d%% added, %d%% art), np surv: %zd, pad: %zd",
3410834108
gen_idx,
3410934109
generation_allocation_size (temp_gen),
3411034110
generation_pinned_allocation_compact_size (temp_gen),
@@ -43932,7 +43932,7 @@ generation* gc_heap::expand_heap (int condemned_generation,
4393243932
BOOL should_promote_ephemeral = FALSE;
4393343933
ptrdiff_t eph_size = total_ephemeral_size;
4393443934
#ifdef BACKGROUND_GC
43935-
dprintf(2,("%s: ---- Heap Expansion ----", (gc_heap::background_running_p() ? "FGC" : "NGC")));
43935+
dprintf(2,("%s: ---- Heap Expansion ----", get_str_gc_type()));
4393643936
#endif //BACKGROUND_GC
4393743937
settings.heap_expansion = TRUE;
4393843938

@@ -44434,7 +44434,7 @@ size_t gc_heap::desired_new_allocation (dynamic_data* dd,
4443444434
{
4443544435
size_t allocated = 0;
4443644436
size_t committed = uoh_committed_size (gen_number, &allocated);
44437-
dprintf (1, ("GC#%zd h%d, GMI: UOH budget, UOH commit %zd (obj %zd, frag %zd), total commit: %zd (recorded: %zd)",
44437+
dprintf (2, ("GC#%zd h%d, GMI: UOH budget, UOH commit %zd (obj %zd, frag %zd), total commit: %zd (recorded: %zd)",
4443844438
(size_t)settings.gc_index, heap_number,
4443944439
committed, allocated,
4444044440
dd_fragmentation (dynamic_data_of (gen_number)),
@@ -44511,7 +44511,7 @@ size_t gc_heap::desired_new_allocation (dynamic_data* dd,
4451144511

4451244512
dd_surv (dd) = cst;
4451344513

44514-
dprintf (1, (ThreadStressLog::gcDesiredNewAllocationMsg(),
44514+
dprintf (2, (ThreadStressLog::gcDesiredNewAllocationMsg(),
4451544515
heap_number, gen_number, out, current_size, (dd_desired_allocation (dd) - dd_gc_new_allocation (dd)),
4451644516
(int)(cst*100), (int)(f*100), current_size + new_allocation, new_allocation));
4451744517

@@ -47805,18 +47805,18 @@ void gc_heap::descr_generations (const char* msg)
4780547805
#endif //!TRACE_GC
4780647806

4780747807
#ifdef STRESS_LOG
47808-
if (StressLog::StressLogOn(LF_GC, LL_INFO10))
47808+
if (StressLog::StressLogOn(LF_GC, LL_INFO1000))
4780947809
{
4781047810
gc_heap* hp = 0;
4781147811
#ifdef MULTIPLE_HEAPS
4781247812
hp= this;
4781347813
#endif //MULTIPLE_HEAPS
4781447814

47815-
STRESS_LOG1(LF_GC, LL_INFO10, "GC Heap %p\n", hp);
47815+
STRESS_LOG1(LF_GC, LL_INFO1000, "GC Heap %p\n", hp);
4781647816
for (int n = max_generation; n >= 0; --n)
4781747817
{
4781847818
#ifndef USE_REGIONS
47819-
STRESS_LOG4(LF_GC, LL_INFO10, " Generation %d [%p, %p] cur = %p\n",
47819+
STRESS_LOG4(LF_GC, LL_INFO1000, " Generation %d [%p, %p] cur = %p\n",
4782047820
n,
4782147821
generation_allocation_start(generation_of(n)),
4782247822
generation_allocation_limit(generation_of(n)),
@@ -47826,7 +47826,7 @@ void gc_heap::descr_generations (const char* msg)
4782647826
heap_segment* seg = generation_start_segment(generation_of(n));
4782747827
while (seg)
4782847828
{
47829-
STRESS_LOG4(LF_GC, LL_INFO10, " Segment mem %p alloc = %p used %p committed %p\n",
47829+
STRESS_LOG4(LF_GC, LL_INFO1000, " Segment mem %p alloc = %p used %p committed %p\n",
4783047830
heap_segment_mem(seg),
4783147831
heap_segment_allocated(seg),
4783247832
heap_segment_used(seg),
@@ -48656,7 +48656,7 @@ void gc_heap::verify_heap (BOOL begin_gc_p)
4865648656
dprintf (2,("[%s]GC#%zu(%s): Verifying heap - begin",
4865748657
(begin_gc_p ? "BEG" : "END"),
4865848658
VolatileLoad(&settings.gc_index),
48659-
(settings.concurrent ? "BGC" : (gc_heap::background_running_p() ? "FGC" : "NGC"))));
48659+
get_str_gc_type()));
4866048660
#else
4866148661
dprintf (2,("[%s]GC#%zu: Verifying heap - begin",
4866248662
(begin_gc_p ? "BEG" : "END"), VolatileLoad(&settings.gc_index)));
@@ -49063,7 +49063,7 @@ void gc_heap::verify_heap (BOOL begin_gc_p)
4906349063

4906449064
#ifdef BACKGROUND_GC
4906549065
dprintf (2, ("(%s)(%s)(%s) total_objects_verified is %zd, total_objects_verified_deep is %zd",
49066-
(settings.concurrent ? "BGC" : (gc_heap::background_running_p () ? "FGC" : "NGC")),
49066+
get_str_gc_type(),
4906749067
(begin_gc_p ? "BEG" : "END"),
4906849068
((current_c_gc_state == c_gc_state_planning) ? "in plan" : "not in plan"),
4906949069
total_objects_verified, total_objects_verified_deep));
@@ -49116,7 +49116,7 @@ void gc_heap::verify_heap (BOOL begin_gc_p)
4911649116
}
4911749117
dprintf (2,("GC%zu(%s): Verifying heap - end",
4911849118
VolatileLoad(&settings.gc_index),
49119-
(settings.concurrent ? "BGC" : (gc_heap::background_running_p() ? "FGC" : "NGC"))));
49119+
get_str_gc_type()));
4912049120
#else
4912149121
dprintf (2,("GC#d: Verifying heap - end", VolatileLoad(&settings.gc_index)));
4912249122
#endif //BACKGROUND_GC
@@ -50881,6 +50881,15 @@ last_recorded_gc_info* gc_heap::get_completed_bgc_info()
5088150881
}
5088250882
#endif //BACKGROUND_GC
5088350883

50884+
const char* gc_heap::get_str_gc_type()
50885+
{
50886+
#ifdef BACKGROUND_GC
50887+
return (settings.concurrent ? "BGC" : (gc_heap::background_running_p () ? "FGC" : "NGC"));
50888+
#else // BACKGROUND_GC
50889+
return "NGC";
50890+
#endif // BACKGROUND_GC
50891+
}
50892+
5088450893
void gc_heap::do_pre_gc()
5088550894
{
5088650895
STRESS_LOG_GC_STACK;
@@ -50909,14 +50918,21 @@ void gc_heap::do_pre_gc()
5090950918
#ifdef TRACE_GC
5091050919
size_t total_allocated_since_last_gc[total_oh_count];
5091150920
get_total_allocated_since_last_gc (total_allocated_since_last_gc);
50912-
50921+
bool compatibleWithStressLog = true;
50922+
#ifdef SIMPLE_DPRINTF
50923+
compatibleWithStressLog = false;
50924+
#endif //SIMPLE_DPRINTF
50925+
bgc_state b_state = bgc_not_in_process;
5091350926
#ifdef BACKGROUND_GC
50927+
b_state = settings.b_state;
50928+
#endif //BACKGROUND_GC
50929+
5091450930
size_t heap_size_before = get_total_heap_size();
5091550931
uint64_t start_gc_time = GetHighPrecisionTimeStamp();
5091650932
uint64_t elapsed_since_last_gc_us = start_gc_time - last_alloc_reset_suspended_end_time;
5091750933
max_peak_heap_size = max (max_peak_heap_size, heap_size_before);
5091850934

50919-
dprintf (6666, (ThreadStressLog::gcDetailedStartMsg(),
50935+
dprintf (6666, (ThreadStressLog::gcDetailedStartMsg(compatibleWithStressLog),
5092050936
VolatileLoad(&settings.gc_index),
5092150937
dd_collection_count (hp->dynamic_data_of (0)),
5092250938
settings.condemned_generation,
@@ -50929,18 +50945,11 @@ void gc_heap::do_pre_gc()
5092950945
(elapsed_since_last_gc_us ? (total_allocated_since_last_gc[gc_oh_num::loh] / 1000.0 / elapsed_since_last_gc_us) : 0),
5093050946
total_allocated_since_last_gc[gc_oh_num::poh],
5093150947
(elapsed_since_last_gc_us ? (total_allocated_since_last_gc[gc_oh_num::poh] / 1000.0 / elapsed_since_last_gc_us) : 0),
50932-
(settings.concurrent ? "BGC" : (gc_heap::background_running_p() ? "FGC" : "NGC")),
50933-
settings.b_state,
50934-
n_heaps,
50935-
(heap_size_before / 1000.0 / 1000.0),
50936-
(max_peak_heap_size / 1000.0 / 1000.0)));
50937-
#else
50938-
dprintf (1, ("*GC* %d(gen0:%d)(%d)(alloc: %zd)",
50939-
VolatileLoad(&settings.gc_index),
50940-
dd_collection_count(hp->dynamic_data_of(0)),
50941-
settings.condemned_generation,
50942-
(total_allocated_since_last_gc[0] + total_allocated_since_last_gc[1] + total_allocated_since_last_gc[2])));
50943-
#endif //BACKGROUND_GC
50948+
get_str_gc_type(),
50949+
b_state,
50950+
n_heaps
50951+
SIMPLE_DPRINTF_ARG(heap_size_before / 1000.0 / 1000.0)
50952+
SIMPLE_DPRINTF_ARG(max_peak_heap_size / 1000.0 / 1000.0)));
5094450953

5094550954
if (heap_hard_limit)
5094650955
{
@@ -51359,18 +51368,12 @@ void gc_heap::do_post_gc()
5135951368
}
5136051369
#endif //BGC_SERVO_TUNING
5136151370

51362-
#ifdef BACKGROUND_GC
51363-
const char* str_gc_type = (settings.concurrent ? "BGC" : (gc_heap::background_running_p () ? "FGC" : "NGC"));
51364-
#else
51365-
const char* str_gc_type = "NGC";
51366-
#endif //BACKGROUND_GC
51367-
5136851371
dprintf (6666, (ThreadStressLog::gcDetailedEndMsg(),
5136951372
VolatileLoad (&settings.gc_index),
5137051373
dd_collection_count (hp->dynamic_data_of (0)),
5137151374
(get_total_heap_size() / 1000.0 / 1000.0),
5137251375
settings.condemned_generation,
51373-
str_gc_type,
51376+
get_str_gc_type(),
5137451377
(settings.compaction ? "C" : "S"),
5137551378
(settings.promotion ? "P" : "S"),
5137651379
settings.entry_memory_load,

src/coreclr/gc/gc.h

Lines changed: 85 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -349,9 +349,23 @@ inline bool IsServerHeap()
349349
#define MAX_LONGPATH 1024
350350
#endif // MAX_LONGPATH
351351

352-
// #define TRACE_GC
352+
// TRACE_GC has two sub-modes: the standard VM stress log mechanism and
353+
// SIMPLE_DPRINTF, which is text output. By default, we enable TRACE_GC (not
354+
// SIMPLE_DPRINTF) for debug/checked builds so that we can catch build breaks.
355+
// HOST_64BIT is required because logging dprintf to the stress log is only
356+
// supported on 64 bit platforms. We could consider enabling it in release
357+
// builds and for more logging sites (see below for details) but are being
358+
// conservative about performance impact.
359+
//
360+
// Normal development time changes are to enable SIMPLE_DPRINTF here (which
361+
// will automatically set TRACE_GC) or to only enable TRACE_GC.
362+
353363
// #define SIMPLE_DPRINTF
354364

365+
#if defined(SIMPLE_DPRINTF) || (defined(_DEBUG) && defined(HOST_64BIT))
366+
#define TRACE_GC
367+
#endif // _DEBUG
368+
355369
#ifdef TRACE_GC
356370
#define MIN_CUSTOM_LOG_LEVEL 7
357371
#define SEG_REUSE_LOG_0 (MIN_CUSTOM_LOG_LEVEL)
@@ -380,10 +394,79 @@ HRESULT initialize_log_file();
380394
void flush_gc_log (bool);
381395
void GCLog (const char *fmt, ... );
382396
#define dprintf(l,x) {if ((l == 1) || (l == GTC_LOG)) {GCLog x;}}
397+
#define SIMPLE_DPRINTF_ARG(x) , x
398+
383399
#else //SIMPLE_DPRINTF
400+
384401
#ifdef HOST_64BIT
385-
#define dprintf(l,x) STRESS_LOG_VA(l,x);
402+
403+
// -------------------------------
404+
// Stress log / dprintf background
405+
// -------------------------------
406+
//
407+
// This code connects dprintf to the stress log mechanism. These machanisms
408+
// and their usage has evolved a bit separately over time, so there are some
409+
// rough edges here.
410+
//
411+
// The stress log mechanism has a LogFacility and a LogLevel. Facilities can be
412+
// chosen through DOTNET_LogFacility, and the facility is recorded in the
413+
// stress log. LogFacility is a bitmask. The GC only has a few bits reserved
414+
// in the bitmask, and most GC logging uses a single value (LF_GC, which is 0x1).
415+
//
416+
// The stress log logging level can be chosen through DOTNET_LogLevel. This
417+
// causes filtering at runtime, and the level is not recorded in the stress log.
418+
// The first argument to dprintf is similar, though it can record either a level
419+
// (values below 7) or a GC area (values starting with SEG_REUSE_LOG_0 above).
420+
// Developers often use StressLogAnalyzer to filter by this value at _analysis_
421+
// time, which doesn't match usual stress log usage.
422+
//
423+
// In practice, dprintf(1) and LL_INFO10 (which has the value 4) have been used
424+
// similarly on log messages. A dprintf(1) is generally called about a few times per
425+
// GC, and LL_INFO10 is "10 logs per small but not trivial run". Other values
426+
// have been audited. We could consider moving the GC values to be in line with
427+
// the rest of the runtime (change 1 to 4 to make room for errors/warnings, etc.)
428+
// or (to avoid churn) convert them by adding 3.
429+
//
430+
// To allow StressLogAnalyzer to use the GC level values, we abuse the stress
431+
// log LogLevel by storing the GC value in the upper 16 bits of LogLevel and
432+
// also settings LF_GC (0x1). This works because we don't enable other logging
433+
// when doing GC investigations. However, we don't want to do this by default
434+
// because setting the upper bits will cause GC logging to masquerade as non-GC
435+
// logging. For example, dprintf(3) would use (3 << 16) | LF_GC == 0x30001,
436+
// which is LF_ASSERT | LF_VERIFIER | LF_GC in other contexts.
437+
//
438+
// Lastly, we have GC logging for some very low level operations, so by default
439+
// we don't want to even have the check that logging is enabled for performance
440+
// reasons. Right now we are very conservative and only allow dprintf(1) to go
441+
// to the stress log in default builds, but we could consider allowing more in
442+
// the future.
443+
444+
// -----------------------------
445+
// Stress log / dprintf settings
446+
// -----------------------------
447+
//
448+
// (See above for details.)
449+
//
450+
// The following line works for normal debug/checked builds (where STRESS_LOG is
451+
// defined and SIMPLE_DPRINTF is not). All dprintf sites are checked for
452+
// compilation errors, yet all but those with level 1 can be statically
453+
// optimized away. In the future after more auditing, this could be expanded to
454+
// more levels.
455+
//
456+
// Note that zero is passed because STRESS_LOG_VA/LogMsg will add LF_GC and in
457+
// normal builds we don't want conflicts in the upper bits of LogFacility.
458+
#define dprintf(l,x) if (l == 1) {STRESS_LOG_VA(0,x);}
459+
460+
// For private builds where it is ok (and useful) to have conflicts in the upper
461+
// bits of LogFacility, more events can be allowed and the dprintf level can be
462+
// passed through. Usually this is going to be a GC investigation and the other
463+
// logging will be disabled, so the theoretical conflict won't happen in
464+
// practice. Note that in these examples, 'l' ("ell", not "one") is passed
465+
// rather than '0'.
466+
//#define dprintf(l,x) STRESS_LOG_VA(l,x);
386467
//#define dprintf(l,x) {if ((l <= 2) || (l == 6666)) {STRESS_LOG_VA(l,x);}}
468+
469+
#define SIMPLE_DPRINTF_ARG(x)
387470
#else //HOST_64BIT
388471
#error Logging dprintf to stress log on 32 bits platforms is not supported.
389472
#endif //HOST_64BIT

src/coreclr/gc/gcpriv.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1578,6 +1578,8 @@ class gc_heap
15781578

15791579
private:
15801580

1581+
PER_HEAP_ISOLATED_METHOD const char* get_str_gc_type();
1582+
15811583
#ifdef TRACE_GC
15821584
PER_HEAP_METHOD void print_free_list (int gen, heap_segment* seg);
15831585
#endif // TRACE_GC

0 commit comments

Comments
 (0)