Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit 30b5f0e

Browse files
authored
Disable GC Coop mode switching during fatal error handling during GC exception (#17710) (#17844)
1 parent 8746b88 commit 30b5f0e

File tree

3 files changed

+69
-5
lines changed

3 files changed

+69
-5
lines changed

src/vm/ceemain.cpp

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2919,6 +2919,7 @@ static void TerminateIPCManager(void)
29192919
// Impl for UtilLoadStringRC Callback: In VM, we let the thread decide culture
29202920
// copy culture name into szBuffer and return length
29212921
// ---------------------------------------------------------------------------
2922+
extern BOOL g_fFatalErrorOccuredOnGCThread;
29222923
static HRESULT GetThreadUICultureNames(__inout StringArrayList* pCultureNames)
29232924
{
29242925
CONTRACTL
@@ -2941,7 +2942,23 @@ static HRESULT GetThreadUICultureNames(__inout StringArrayList* pCultureNames)
29412942

29422943
Thread * pThread = GetThread();
29432944

2944-
if (pThread != NULL) {
2945+
// When fatal errors have occured our invariants around GC modes may be broken and attempting to transition to co-op may hang
2946+
// indefinately. We want to ensure a clean exit so rather than take the risk of hang we take a risk of the error resource not
2947+
// getting localized with a non-default thread-specific culture.
2948+
// A canonical stack trace that gets here is a fatal error in the GC that comes through:
2949+
// coreclr.dll!GetThreadUICultureNames
2950+
// coreclr.dll!CCompRC::LoadLibraryHelper
2951+
// coreclr.dll!CCompRC::LoadLibrary
2952+
// coreclr.dll!CCompRC::GetLibrary
2953+
// coreclr.dll!CCompRC::LoadString
2954+
// coreclr.dll!CCompRC::LoadString
2955+
// coreclr.dll!SString::LoadResourceAndReturnHR
2956+
// coreclr.dll!SString::LoadResourceAndReturnHR
2957+
// coreclr.dll!SString::LoadResource
2958+
// coreclr.dll!EventReporter::EventReporter
2959+
// coreclr.dll!EEPolicy::LogFatalError
2960+
// coreclr.dll!EEPolicy::HandleFatalError
2961+
if (pThread != NULL && !g_fFatalErrorOccuredOnGCThread) {
29452962

29462963
// Switch to cooperative mode, since we'll be looking at managed objects
29472964
// and we don't want them moving on us.
@@ -3071,8 +3088,24 @@ static int GetThreadUICultureId(__out LocaleIDValue* pLocale)
30713088

30723089
Thread * pThread = GetThread();
30733090

3074-
if (pThread != NULL) {
3075-
3091+
// When fatal errors have occured our invariants around GC modes may be broken and attempting to transition to co-op may hang
3092+
// indefinately. We want to ensure a clean exit so rather than take the risk of hang we take a risk of the error resource not
3093+
// getting localized with a non-default thread-specific culture.
3094+
// A canonical stack trace that gets here is a fatal error in the GC that comes through:
3095+
// coreclr.dll!GetThreadUICultureNames
3096+
// coreclr.dll!CCompRC::LoadLibraryHelper
3097+
// coreclr.dll!CCompRC::LoadLibrary
3098+
// coreclr.dll!CCompRC::GetLibrary
3099+
// coreclr.dll!CCompRC::LoadString
3100+
// coreclr.dll!CCompRC::LoadString
3101+
// coreclr.dll!SString::LoadResourceAndReturnHR
3102+
// coreclr.dll!SString::LoadResourceAndReturnHR
3103+
// coreclr.dll!SString::LoadResource
3104+
// coreclr.dll!EventReporter::EventReporter
3105+
// coreclr.dll!EEPolicy::LogFatalError
3106+
// coreclr.dll!EEPolicy::HandleFatalError
3107+
if (pThread != NULL && !g_fFatalErrorOccuredOnGCThread)
3108+
{
30763109
// Switch to cooperative mode, since we'll be looking at managed objects
30773110
// and we don't want them moving on us.
30783111
GCX_COOP();
@@ -3130,7 +3163,24 @@ static int GetThreadUICultureId(__out LocaleIDValue* pLocale)
31303163

31313164
Thread * pThread = GetThread();
31323165

3133-
if (pThread != NULL) {
3166+
// When fatal errors have occured our invariants around GC modes may be broken and attempting to transition to co-op may hang
3167+
// indefinately. We want to ensure a clean exit so rather than take the risk of hang we take a risk of the error resource not
3168+
// getting localized with a non-default thread-specific culture.
3169+
// A canonical stack trace that gets here is a fatal error in the GC that comes through:
3170+
// coreclr.dll!GetThreadUICultureNames
3171+
// coreclr.dll!CCompRC::LoadLibraryHelper
3172+
// coreclr.dll!CCompRC::LoadLibrary
3173+
// coreclr.dll!CCompRC::GetLibrary
3174+
// coreclr.dll!CCompRC::LoadString
3175+
// coreclr.dll!CCompRC::LoadString
3176+
// coreclr.dll!SString::LoadResourceAndReturnHR
3177+
// coreclr.dll!SString::LoadResourceAndReturnHR
3178+
// coreclr.dll!SString::LoadResource
3179+
// coreclr.dll!EventReporter::EventReporter
3180+
// coreclr.dll!EEPolicy::LogFatalError
3181+
// coreclr.dll!EEPolicy::HandleFatalError
3182+
if (pThread != NULL && !g_fFatalErrorOccuredOnGCThread)
3183+
{
31343184

31353185
// Switch to cooperative mode, since we'll be looking at managed objects
31363186
// and we don't want them moving on us.

src/vm/eepolicy.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1216,6 +1216,8 @@ inline void DoLogForFailFastException(LPCWSTR pszMessage, PEXCEPTION_POINTERS pE
12161216
EX_END_CATCH(SwallowAllExceptions)
12171217
}
12181218

1219+
//This starts FALSE and then converts to true if HandleFatalError has ever been called by a GC thread
1220+
BOOL g_fFatalErrorOccuredOnGCThread = FALSE;
12191221
//
12201222
// Log an error to the event log if possible, then throw up a dialog box.
12211223
//
@@ -1349,7 +1351,7 @@ void EEPolicy::LogFatalError(UINT exitCode, UINT_PTR address, LPCWSTR pszMessage
13491351
//Give a managed debugger a chance if this fatal error is on a managed thread.
13501352
Thread *pThread = GetThread();
13511353

1352-
if (pThread)
1354+
if (pThread && !g_fFatalErrorOccuredOnGCThread)
13531355
{
13541356
GCX_COOP();
13551357

@@ -1491,6 +1493,9 @@ void DECLSPEC_NORETURN EEPolicy::HandleFatalStackOverflow(EXCEPTION_POINTERS *pE
14911493
UNREACHABLE();
14921494
}
14931495

1496+
1497+
1498+
14941499
void DECLSPEC_NORETURN EEPolicy::HandleFatalError(UINT exitCode, UINT_PTR address, LPCWSTR pszMessage /* = NULL */, PEXCEPTION_POINTERS pExceptionInfo /* = NULL */, LPCWSTR errorSource /* = NULL */, LPCWSTR argExceptionString /* = NULL */)
14951500
{
14961501
WRAPPER_NO_CONTRACT;
@@ -1526,6 +1531,14 @@ void DECLSPEC_NORETURN EEPolicy::HandleFatalError(UINT exitCode, UINT_PTR addres
15261531
// All of the code from here on out is robust to any failures in any API's that are called.
15271532
CONTRACT_VIOLATION(GCViolation | ModeViolation | SOToleranceViolation | FaultNotFatal | TakesLockViolation);
15281533

1534+
1535+
// Setting g_fFatalErrorOccuredOnGCThread allows code to avoid attempting to make GC mode transitions which could
1536+
// block indefinately if the fatal error occured during the GC.
1537+
if (IsGCSpecialThread() && GCHeapUtilities::IsGCInProgress())
1538+
{
1539+
g_fFatalErrorOccuredOnGCThread = TRUE;
1540+
}
1541+
15291542
// ThreadStore lock needs to be released before continuing with the FatalError handling should
15301543
// because debugger is going to take CrstDebuggerMutex, whose lock level is higher than that of
15311544
// CrstThreadStore. It should be safe to release the lock since execution will not be resumed

src/vm/vars.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,7 @@ EXTERN BOOL g_fComStarted;
528528
//
529529
GVAL_DECL(DWORD, g_fEEShutDown);
530530
EXTERN DWORD g_fFastExitProcess;
531+
EXTERN BOOL g_fFatalErrorOccurredOnGCThread;
531532
#ifndef DACCESS_COMPILE
532533
EXTERN BOOL g_fSuspendOnShutdown;
533534
EXTERN BOOL g_fSuspendFinalizerOnShutdown;

0 commit comments

Comments
 (0)