Skip to content

Commit 9a879d3

Browse files
authored
changes to help diag init failure related to regions and expose more region size related configs in runtimeconfig (#114944)
lately I've seen a couple of customer reports where the GC initialization failed to reserve the default 256GB of virtual memory for the regions range, to make this easier to diagnose I've added this as an error communicated to host. so you would see something like this C:\temp>dotnet GCPerfSim.dll -tc 28 -tagb 50 -tlgb 2 -lohar 0 -sohsi 50 -lohsi 0 -pohsi 0 -sohpi 100 -lohpi 0 -sohfi 0 -lohfi 0 -pohfi 0 -allocType reference -testKind time GC: Reserving 274877906944 bytes (256 GiB) for the regions range failed, do you have a virtual memory limit set on this process? GC heap initialization failed with error 0x8007000E Failed to create CoreCLR, HRESULT: 0x8007000E also added a few other places where we might hit and got rid of some that are only for private testing. I'm not adding this for every single case where it could fail as they are really unlikely to be hit. since the solution is to adjust some region configs I also exposed them to the runtimeconfig. and fixed a typo I had for a config for private testing.
1 parent cec44d6 commit 9a879d3

File tree

4 files changed

+53
-13
lines changed

4 files changed

+53
-13
lines changed

src/coreclr/gc/gc.cpp

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,12 @@ float mb (size_t num)
427427
return (float)((float)num / 1000.0 / 1000.0);
428428
}
429429

430+
inline
431+
size_t gib (size_t num)
432+
{
433+
return (num / 1024 / 1024 / 1024);
434+
}
435+
430436
#ifdef BACKGROUND_GC
431437
uint32_t bgc_alloc_spin_count = 140;
432438
uint32_t bgc_alloc_spin_count_uoh = 16;
@@ -3908,6 +3914,10 @@ bool region_allocator::init (uint8_t* start, uint8_t* end, size_t alignment, uin
39083914
*lowest = global_region_start;
39093915
*highest = global_region_end;
39103916
}
3917+
else
3918+
{
3919+
log_init_error_to_host ("global region allocator failed to allocate %zd bytes during init", (total_num_units * sizeof (uint32_t)));
3920+
}
39113921

39123922
return (unit_map != 0);
39133923
}
@@ -9467,6 +9477,7 @@ bool gc_heap::inplace_commit_card_table (uint8_t* from, uint8_t* to)
94679477
succeed = virtual_commit (commit_begins[i], commit_sizes[i], recorded_committed_bookkeeping_bucket);
94689478
if (!succeed)
94699479
{
9480+
log_init_error_to_host ("Committing %zd bytes (%.3f mb) for GC bookkeeping element#%d failed", commit_sizes[i], mb (commit_sizes[i]), i);
94709481
failed_commit = i;
94719482
break;
94729483
}
@@ -9520,7 +9531,10 @@ uint32_t* gc_heap::make_card_table (uint8_t* start, uint8_t* end)
95209531
bookkeeping_start = mem;
95219532

95229533
if (!mem)
9534+
{
9535+
log_init_error_to_host ("Reserving %zd bytes (%.3f mb) for GC bookkeeping failed", alloc_size, mb (alloc_size));
95239536
return 0;
9537+
}
95249538

95259539
dprintf (2, ("Init - Card table alloc for %zd bytes: [%zx, %zx[",
95269540
alloc_size, (size_t)mem, (size_t)(mem+alloc_size)));
@@ -12398,6 +12412,7 @@ heap_segment* gc_heap::make_heap_segment (uint8_t* new_pages, size_t size, gc_he
1239812412

1239912413
if (!virtual_commit (new_pages, initial_commit, oh, h_number))
1240012414
{
12415+
log_init_error_to_host ("Committing %zd bytes for a region failed", initial_commit);
1240112416
return 0;
1240212417
}
1240312418

@@ -14345,6 +14360,7 @@ bool allocate_initial_regions(int number_of_heaps)
1434514360
initial_regions = new (nothrow) uint8_t*[number_of_heaps][total_generation_count][2];
1434614361
if (initial_regions == nullptr)
1434714362
{
14363+
log_init_error_to_host ("allocate_initial_regions failed to allocate %zd bytes", (number_of_heaps * total_generation_count * 2 * sizeof (uint8_t*)));
1434814364
return false;
1434914365
}
1435014366
for (int i = 0; i < number_of_heaps; i++)
@@ -14407,7 +14423,6 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size,
1440714423

1440814424
if (gc_config_log == NULL)
1440914425
{
14410-
GCToEEInterface::LogErrorToHost("Cannot create log file");
1441114426
return E_FAIL;
1441214427
}
1441314428

@@ -14532,7 +14547,11 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size,
1453214547
size_t reserve_size = regions_range;
1453314548
uint8_t* reserve_range = (uint8_t*)virtual_alloc (reserve_size, use_large_pages_p);
1453414549
if (!reserve_range)
14550+
{
14551+
log_init_error_to_host ("Reserving %zd bytes (%zd GiB) for the regions range failed, do you have a virtual memory limit set on this process?",
14552+
reserve_size, gib (reserve_size));
1453514553
return E_OUTOFMEMORY;
14554+
}
1453614555

1453714556
if (!global_region_allocator.init (reserve_range, (reserve_range + reserve_size),
1453814557
((size_t)1 << min_segment_size_shr),
@@ -14545,7 +14564,7 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size,
1454514564
else
1454614565
{
1454714566
assert (!"cannot use regions without specifying the range!!!");
14548-
GCToEEInterface::LogErrorToHost("Cannot use regions without specifying the range (using DOTNET_GCRegionRange)");
14567+
log_init_error_to_host ("Regions range is 0! unexpected");
1454914568
return E_FAIL;
1455014569
}
1455114570
#else //USE_REGIONS
@@ -14685,7 +14704,7 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size,
1468514704

1468614705
if (!init_semi_shared())
1468714706
{
14688-
GCToEEInterface::LogErrorToHost("PER_HEAP_ISOLATED data members initialization failed");
14707+
log_init_error_to_host ("PER_HEAP_ISOLATED data members initialization failed");
1468914708
hres = E_FAIL;
1469014709
}
1469114710

@@ -49226,6 +49245,7 @@ HRESULT GCHeap::Initialize()
4922649245
memset (gc_heap::committed_by_oh, 0, sizeof (gc_heap::committed_by_oh));
4922749246
if (!gc_heap::compute_hard_limit())
4922849247
{
49248+
log_init_error_to_host ("compute_hard_limit failed, check your heap hard limit related configs");
4922949249
return CLR_E_GC_BAD_HARD_LIMIT;
4923049250
}
4923149251

@@ -49243,6 +49263,7 @@ HRESULT GCHeap::Initialize()
4924349263
uintptr_t config_affinity_mask = static_cast<uintptr_t>(GCConfig::GetGCHeapAffinitizeMask());
4924449264
if (!ParseGCHeapAffinitizeRanges(cpu_index_ranges_holder.Get(), &config_affinity_set, config_affinity_mask))
4924549265
{
49266+
log_init_error_to_host ("ParseGCHeapAffinitizeRange failed, check your HeapAffinitizeRanges config");
4924649267
return CLR_E_GC_BAD_AFFINITY_CONFIG_FORMAT;
4924749268
}
4924849269

@@ -49251,6 +49272,7 @@ HRESULT GCHeap::Initialize()
4925149272

4925249273
if (process_affinity_set->IsEmpty())
4925349274
{
49275+
log_init_error_to_host ("This process is affinitize to 0 CPUs, check your GC heap affinity related configs");
4925449276
return CLR_E_GC_BAD_AFFINITY_CONFIG;
4925549277
}
4925649278

@@ -49393,6 +49415,8 @@ HRESULT GCHeap::Initialize()
4939349415

4939449416
if (gc_region_size >= MAX_REGION_SIZE)
4939549417
{
49418+
log_init_error_to_host ("The GC RegionSize config is set to %zd bytes (%zd GiB), it needs to be < %zd GiB",
49419+
gc_region_size, gib (gc_region_size), gib (MAX_REGION_SIZE));
4939649420
return CLR_E_GC_BAD_REGION_SIZE;
4939749421
}
4939849422

@@ -49421,6 +49445,8 @@ HRESULT GCHeap::Initialize()
4942149445

4942249446
if (!power_of_two_p(gc_region_size) || ((gc_region_size * nhp * min_regions_per_heap) > gc_heap::regions_range))
4942349447
{
49448+
log_init_error_to_host ("Region size is %zd bytes, range is %zd bytes, (%d heaps * %d regions/heap = %d) regions needed initially",
49449+
gc_region_size, gc_heap::regions_range, nhp, min_regions_per_heap, (nhp * min_regions_per_heap));
4942449450
return E_OUTOFMEMORY;
4942549451
}
4942649452

@@ -49495,7 +49521,7 @@ HRESULT GCHeap::Initialize()
4949549521

4949649522
if (!WaitForGCEvent->CreateManualEventNoThrow(TRUE))
4949749523
{
49498-
GCToEEInterface::LogErrorToHost("Creation of WaitForGCEvent failed");
49524+
log_init_error_to_host ("Creation of WaitForGCEvent failed");
4949949525
return E_FAIL;
4950049526
}
4950149527

@@ -49584,12 +49610,10 @@ HRESULT GCHeap::Initialize()
4958449610
uint8_t* numa_mem = (uint8_t*)GCToOSInterface::VirtualReserve (hb_info_size_per_node, 0, 0, (uint16_t)numa_node_index);
4958549611
if (!numa_mem)
4958649612
{
49587-
GCToEEInterface::LogErrorToHost("Reservation of numa_mem failed");
4958849613
return E_FAIL;
4958949614
}
4959049615
if (!GCToOSInterface::VirtualCommit (numa_mem, hb_info_size_per_node, (uint16_t)numa_node_index))
4959149616
{
49592-
GCToEEInterface::LogErrorToHost("Commit of numa_mem failed");
4959349617
return E_FAIL;
4959449618
}
4959549619

@@ -49691,7 +49715,6 @@ HRESULT GCHeap::Initialize()
4969149715

4969249716
if (seg_mem == nullptr)
4969349717
{
49694-
GCToEEInterface::LogErrorToHost("STRESS_REGIONS couldn't allocate ro segment");
4969549718
hr = E_FAIL;
4969649719
break;
4969749720
}
@@ -49705,7 +49728,6 @@ HRESULT GCHeap::Initialize()
4970549728

4970649729
if (!RegisterFrozenSegment(&seg_info))
4970749730
{
49708-
GCToEEInterface::LogErrorToHost("STRESS_REGIONS failed to RegisterFrozenSegment");
4970949731
hr = E_FAIL;
4971049732
break;
4971149733
}

src/coreclr/gc/gc.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,4 +392,6 @@ void GCLog (const char *fmt, ... );
392392
FILE* CreateLogFile(const GCConfigStringHolder& temp_logfile_name, bool is_config);
393393
#endif //TRACE_GC || GC_CONFIG_DRIVEN
394394

395+
void log_init_error_to_host (const char* format, ...);
396+
395397
#endif // __GC_H

src/coreclr/gc/gccommon.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,12 @@ FILE* CreateLogFile(const GCConfigStringHolder& temp_logfile_name, bool is_confi
132132
//_snprintf_s(logfile_name, MAX_LONGPATH+1, _TRUNCATE, "%s.%d%s", temp_logfile_name.Get(), pid, suffix);
133133
_snprintf_s(logfile_name, MAX_LONGPATH+1, _TRUNCATE, "%s%s", temp_logfile_name.Get(), suffix);
134134
logFile = fopen(logfile_name, "wb");
135+
136+
if (logFile == NULL)
137+
{
138+
log_init_error_to_host ("Cannot create log file %s", logfile_name);
139+
}
140+
135141
return logFile;
136142
}
137143
#endif //TRACE_GC || GC_CONFIG_DRIVEN
@@ -159,7 +165,6 @@ HRESULT initialize_log_file()
159165

160166
if (gc_log == NULL)
161167
{
162-
GCToEEInterface::LogErrorToHost("Cannot create log file");
163168
return E_FAIL;
164169
}
165170

@@ -168,7 +173,7 @@ HRESULT initialize_log_file()
168173

169174
if (gc_log_file_size <= 0 || gc_log_file_size > 500)
170175
{
171-
GCToEEInterface::LogErrorToHost("Invalid log file size (valid size needs to be larger than 0 and smaller than 500)");
176+
log_init_error_to_host ("Invalid log file size %zd MiB (valid size needs to be > 0 and <= 500 MiB)", gc_log_file_size);
172177
fclose (gc_log);
173178
return E_FAIL;
174179
}
@@ -265,4 +270,15 @@ void GCLog (const char *fmt, ... )
265270
}
266271
#endif //TRACE_GC && SIMPLE_DPRINTF
267272

273+
// We log initialization errors to the host to help with diagnostics. By default these will show up in stdout.
274+
// You can also redirect them to a file. See docs/design/features/host-tracing.md.
275+
void log_init_error_to_host (const char* format, ...)
276+
{
277+
char error_buf[256];
278+
va_list args;
279+
va_start (args, format);
280+
_vsnprintf_s (error_buf, ARRAY_SIZE (error_buf), _TRUNCATE, format, args);
281+
GCToEEInterface::LogErrorToHost (error_buf);
282+
va_end (args);
283+
}
268284
#endif // !DACCESS_COMPILE

src/coreclr/gc/gcconfig.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,8 @@ class GCConfigStringHolder
104104
INT_CONFIG (GCHeapHardLimit, "GCHeapHardLimit", "System.GC.HeapHardLimit", 0, "Specifies a hard limit for the GC heap") \
105105
INT_CONFIG (GCHeapHardLimitPercent, "GCHeapHardLimitPercent", "System.GC.HeapHardLimitPercent", 0, "Specifies the GC heap usage as a percentage of the total memory") \
106106
INT_CONFIG (GCTotalPhysicalMemory, "GCTotalPhysicalMemory", NULL, 0, "Specifies what the GC should consider to be total physical memory") \
107-
INT_CONFIG (GCRegionRange, "GCRegionRange", NULL, 0, "Specifies the range for the GC heap") \
108-
INT_CONFIG (GCRegionSize, "GCRegionSize", NULL, 0, "Specifies the size for a basic GC region") \
107+
INT_CONFIG (GCRegionRange, "GCRegionRange", "System.GC.RegionRange", 0, "Specifies the range for the GC heap") \
108+
INT_CONFIG (GCRegionSize, "GCRegionSize", "System.GC.RegionSize", 0, "Specifies the size for a basic GC region") \
109109
INT_CONFIG (GCEnableSpecialRegions, "GCEnableSpecialRegions", NULL, 0, "Specifies to enable special handling some regions like SIP") \
110110
STRING_CONFIG(LogFile, "GCLogFile", NULL, "Specifies the name of the GC log file") \
111111
STRING_CONFIG(ConfigLogFile, "GCConfigLogFile", NULL, "Specifies the name of the GC config log file") \
@@ -142,7 +142,7 @@ class GCConfigStringHolder
142142
INT_CONFIG (GCSpinCountUnit, "GCSpinCountUnit", NULL, 0, "Specifies the spin count unit used by the GC.") \
143143
INT_CONFIG (GCDynamicAdaptationMode, "GCDynamicAdaptationMode", "System.GC.DynamicAdaptationMode", 1, "Enable the GC to dynamically adapt to application sizes.") \
144144
INT_CONFIG (GCDTargetTCP, "GCDTargetTCP", "System.GC.DTargetTCP", 0, "Specifies the target tcp for DATAS") \
145-
INT_CONFIG (GCDBGCRatio, " GCDBGCRatio", NULL, 0, "Specifies the ratio of BGC to NGC2 for HC change") \
145+
INT_CONFIG (GCDBGCRatio, "GCDBGCRatio", NULL, 0, "Specifies the ratio of BGC to NGC2 for HC change") \
146146
BOOL_CONFIG (GCCacheSizeFromSysConf, "GCCacheSizeFromSysConf", NULL, false, "Specifies using sysconf to retrieve the last level cache size for Unix.")
147147

148148
// This class is responsible for retreiving configuration information

0 commit comments

Comments
 (0)